{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "36a877ae",
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "source": [
    "# Language Models\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8b60dead",
   "metadata": {
    "attributes": {
     "classes": [],
     "id": "",
     "n": "3"
    },
    "execution": {
     "iopub.execute_input": "2023-08-18T19:28:41.375771Z",
     "iopub.status.busy": "2023-08-18T19:28:41.375138Z",
     "iopub.status.idle": "2023-08-18T19:28:44.315880Z",
     "shell.execute_reply": "2023-08-18T19:28:44.314922Z"
    },
    "origin_pos": 3,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from d2l import torch as d2l"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "288638d1",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Read minibatches of input sequences and target sequences at random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7cbf8e11",
   "metadata": {
    "attributes": {
     "classes": [],
     "id": "",
     "n": "7"
    },
    "execution": {
     "iopub.execute_input": "2023-08-18T19:28:44.338600Z",
     "iopub.status.busy": "2023-08-18T19:28:44.338043Z",
     "iopub.status.idle": "2023-08-18T19:28:45.209913Z",
     "shell.execute_reply": "2023-08-18T19:28:45.208778Z"
    },
    "origin_pos": 11,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading ../data/timemachine.txt from http://d2l-data.s3-accelerate.amazonaws.com/timemachine.txt...\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X: tensor([[10,  4,  2, 21, 10, 16, 15,  0, 20,  2],\n",
      "        [21,  9,  6, 19,  0, 24,  2, 26,  0, 16]]) \n",
      "Y: tensor([[ 4,  2, 21, 10, 16, 15,  0, 20,  2, 10],\n",
      "        [ 9,  6, 19,  0, 24,  2, 26,  0, 16,  9]])\n"
     ]
    }
   ],
   "source": [
    "@d2l.add_to_class(d2l.TimeMachine)  \n",
    "def __init__(self, batch_size, num_steps, num_train=10000, num_val=5000):\n",
    "    super(d2l.TimeMachine, self).__init__()\n",
    "    self.save_hyperparameters()\n",
    "    corpus, self.vocab = self.build(self._download())\n",
    "    array = torch.tensor([corpus[i:i+num_steps+1]\n",
    "                        for i in range(len(corpus)-num_steps)])\n",
    "    self.X, self.Y = array[:,:-1], array[:,1:]\n",
    "\n",
    "@d2l.add_to_class(d2l.TimeMachine)  \n",
    "def get_dataloader(self, train):\n",
    "    idx = slice(0, self.num_train) if train else slice(\n",
    "        self.num_train, self.num_train + self.num_val)\n",
    "    return self.get_tensorloader([self.X, self.Y], train, idx)\n",
    "\n",
    "data = d2l.TimeMachine(batch_size=2, num_steps=10)\n",
    "for X, Y in data.train_dataloader():\n",
    "    print('X:', X, '\\nY:', Y)\n",
    "    break"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "language_info": {
   "name": "python"
  },
  "required_libs": [],
  "rise": {
   "autolaunch": true,
   "enable_chalkboard": true,
   "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
   "scroll": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}